# Fuel price data: R code for generating tables and graphics for
# "Global progress and backsliding on gasoline taxes and subsidies"
# Nature Energy 2, 16201 (2017)
# 
# Created by Paasha Mahdavi
# This version: 6 Jan 2017
#
#-----------------------------------------------------------------------------#
#    Loading packages and data                                                #
#-----------------------------------------------------------------------------#


if(!require("ggplot2")) install.packages("ggplot2")
if(!require("gridExtra")) install.packages("gridExtra")
if(!require("scales")) install.packages("scales")
if(!require("xtable")) install.packages("xtable")
if(!require("tseries")) install.packages("tseries")
if(!require("grid")) install.packages("grid")
if(!require("zoo")) install.packages("zoo")
if(!require("ggrepel")) install.packages("ggrepel")
if(!require("foreign")) install.packages("foreign")
if(!require("dplyr")) install.packages("dplyr")

# Load data
data <- read.dta("gasoline-price-data.dta")
data$date <- as.Date(paste(data$month,"-01-",data$year,sep=""),"%m-%d-%Y")





#------------------------------------------------------------------------------#
#   Main text analysis, plots and tables                                       #
#------------------------------------------------------------------------------#

#------------------------------------------------------------------------------#
#   Figure 1                                                                   #
#   Gasoline prices by country and benchmark price trends over time            #
#------------------------------------------------------------------------------#

# Prices over time by country updated benchmark
#   Congo Dem Rep is excluded for visual purposes 
#   (high spike in 2001 due to changes in Xrate)

pdf(file="FIG1-countryprices.pdf",width=6.5,height=4.5)
ggplot(data[data$wb_code!="ZAR",],aes(x=date,y=price_usd_2015)) + 
  geom_line(aes(group=country),col="darkgray", size = 0.25) + 
  geom_line(data=data[data$wb_code=="AFG",],
            aes(x=date,y=benchmark_2015_adj), size=0.5, col="red") + 
  scale_x_date(breaks = date_breaks("1 year"),
               minor_breaks=date_breaks("6 months"),
               labels = date_format("%Y"),
               limits=c(as.Date("2003-01-01"),as.Date("2015-10-01"))) + 
  scale_y_continuous(breaks=seq(0,3.25,.5), 
                     labels=c("0.00","0.50","1.00","1.50","2.00","2.50","$3.00")) + 
  labs(x="\n Year",y="Gasoline price (constant 2015 USD per liter) \n") + 
  coord_cartesian(ylim=c(-0.05,3.25)) +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9))
dev.off()





#------------------------------------------------------------------------------#
#   Figure 2                                                                   #
#   Implicit taxes and subsidies by country in 2003 versus 2015                #
#------------------------------------------------------------------------------#

# Creating bmg plot comparison data
bmgplot <- data.frame(wb_code = data$wb_code[data$bookends=="Begin"], 
                      country.short = data$country_short[data$bookends=="Begin"], 
                      bmgap03 = data$bmgap2015adj[data$bookends=="Begin"],
                      bmgap15 = data$bmgap2015adj[data$bookends=="End"], 
                      cons = 10^(data$cons[data$bookends=="End"]),
                      emissions = data$emissions[data$bookends=="End"])

bmgplot <- summarize(group_by(bmgplot,wb_code), 
                     bmgap03 = mean(bmgap03,na.rm=T), 
                     bmgap15 = mean(bmgap15,na.rm=T), 
                     cons = mean(cons,na.rm=T),
                     emissions = mean(emissions,na.rm=T),
                     country.short = unique(country.short,na.rm=T))

# Net change between 1st half 2015 and 1st half 2003
bmgplot$change <- bmgplot$bmgap15-bmgplot$bmgap03

# Coloring quadrants according to positive v negative net change
bmgplot$quadrant2 <- with(bmgplot,
                          ifelse(!is.na(bmgap15)&!is.na(bmgap03),
                            ifelse(change>=0,1,2),
                            NA)
)

# Quadrant colors
cbbPalette <- c("#0072B2", "#D55E00")


# 2003 vs 2015 benchmark gap comparison 
pdf(file="FIG2-bmg2003v2015.pdf",width=13,height=11)
ggplot(bmgplot, aes(x=bmgap03, y=bmgap15, size=cons, label=country.short,
                    col = factor(quadrant2))) + 
  geom_hline(yintercept=0, linetype="longdash", col="gray") + 
  geom_vline(xintercept=0, linetype="longdash", col="gray") + 
  geom_abline(intercept = 0, slope = 1, col = "darkgray", linetype = "longdash", size = 1) + 
  geom_text_repel() + 
  scale_size(range=c(3,6.5),name="Gasoline consumption \n (thou. bbls per day)",
             breaks=c(100,1000,9000),labels=c(100,1000,10000)) + 
  scale_x_continuous(breaks=seq(-0.25,1.25,0.25)) + 
  scale_y_continuous(breaks=seq(-0.5,1.5,0.25)) + 
  scale_colour_manual(values=cbbPalette) +
  labs(y="Distance from benchmark in 1st half 2015 (constant 2015 USD per litre)",
       x="\n Distance from benchmark in 1st half 2003 (constant 2015 USD per litre)") + 
  coord_cartesian(ylim = c(-0.6, 1.5), xlim = c(-0.4, 1.3)) + 
  theme(legend.position=c(0.85,0.1),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.text.x = element_text(size=16), 
        axis.text.y = element_text(size=16),
        axis.title.x = element_text(size=18), 
        axis.title.y = element_text(size=18)) +
  guides(col=FALSE)
dev.off()





#------------------------------------------------------------------------------#
#   Figure 3                                                                   #
#   Implicit taxes and subsidies over time                                     #
#------------------------------------------------------------------------------#

# Restricting to countries with price data in Jan-Jun 2003 and Jan-Jun 2015
bk <- summarize(
  group_by(data %>% filter(bookends=="Begin"|bookends=="End",
                           !is.na(price_usd_2015)),
           wb_code,bookends), 
  price = mean(price_usd_2015, na.rm = T)
)

bka <- summarize(group_by(bk,wb_code), nums=n())

bksf <- bka %>% filter(nums==2) %>% select(wb_code)

# Summarizing by date
wgt <- summarize(
  group_by(
    data %>% filter(date %in% (as.Date("2003-01-01"):as.Date("2015-06-01")),
                         wb_code %in% bksf$wb_code), 
    date), 
  meanprice = mean(price_usd_2015, na.rm=T), 
  weightedmeanprice = sum(conspct*price_usd_2015, na.rm=T), 
  benchmark = median(benchmark_2015_adj,na.rm=T), 
  check = sum(conspct, na.rm=T), 
  meantax = meanprice - benchmark, 
  wgtmeantax = weightedmeanprice - benchmark, 
  meanpctbm = mean(pctbmgap2015adj, na.rm=T), 
  wgtmeanpctbm = sum(conspct*pctbmgap2015adj, na.rm=T), 
  bookends = unique(bookends), 
  year = unique(year)
)


# Benchmark gap over time by country with mean pricess, weighted & unweighted
pdf(file = "FIG3-weightedbmg.pdf",width=6.5,height=4.5)
ggplot() + 
  geom_line(data = data[data$wb_code!="ZAR",], 
            aes(x = date, y = bmgap2015adj, group = wb_code),
            col = "darkgray", size = 0.25) +
  geom_line(data = data.frame(wgt), aes(x = date, y = meantax),
            col = "#663300", size = 0.75) + 
  labs(x = "\n Year", 
       y = "Distance between benchmark and local price \n (constant 2015 USD per liter) \n") + 
  scale_x_date(breaks = date_breaks("1 year"),
               minor_breaks = date_breaks("6 months"),
               labels = date_format("%Y"),
               limits = c(as.Date("2003-01-01"), as.Date("2015-10-01"))) + 
  scale_y_continuous(breaks = seq(-1,2,0.5),
                     labels = c("-1.00","-0.50","0.00","0.50","1.00","1.50",
                                "$2.00")) + 
  geom_line(data = data.frame(wgt), aes(x = date, y = wgtmeantax), 
            col = "#000066",size = 0.75) + 
  geom_hline(yintercept = 0, linetype="dashed",size = 1, alpha = 0.5) + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9)) +
  coord_cartesian(ylim = c(-1,2.2))
dev.off()





#------------------------------------------------------------------------------#
#   Figure 4                                                                   #
#   Implicit taxes and subsidies by region over time                           #
#------------------------------------------------------------------------------#

# By region over time
regional <- summarize(
  group_by(
    data[data$wb_code!="ZAR",],
    region2,date), 
  benchgap15adj = mean(bmgap2015adj,na.rm=T)
)


# Benchmark gap over time by region with benchmark
pdf(file="FIG4-bmgap-region.pdf",width=6.5,height=4.5)
ggplot(regional, aes(x=date,y=benchgap15adj)) + 
  geom_line(aes(group=region2,col=factor(region2),
                linetype=factor(region2)), size = 0.5) + 
  geom_hline(yintercept=0,size=0.5,alpha=0.5,linetype="dashed") + 
  scale_x_date(breaks = date_breaks("1 year"),
               minor_breaks=date_breaks("6 months"),
               labels = date_format("%Y"),
               limits=c(as.Date("2003-01-01"),as.Date("2015-10-01"))) + 
  scale_y_continuous(breaks=seq(-05,1.5,.25)) + 
  labs(x="\n Year",y="Distance between benchmark and local price \n (constant 2015 USD per liter)") + 
  scale_colour_manual(name="",
                      breaks=c("Europe + North America","Africa",
                               "Asia + Pacific","Latin America & Caribbean",
                               "Former SSR","Middle East"),
                      values= c( "#CC79A7","#999999", "#E69F00", 
                                 "#000000", "#D55E00", "#0072B2")) + 
  scale_linetype_manual(name="",
                        breaks=c("Europe + North America","Africa",
                                 "Asia + Pacific","Latin America & Caribbean",
                                 "Former SSR","Middle East"),
                        values=c("solid","longdash","solid","solid",
                                 "longdash","longdash")) + 
  theme(legend.position="none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9)) +
  coord_cartesian(ylim=c(-0.45,1.45))
dev.off()





#------------------------------------------------------------------------------#
#   Figure 5                                                                   #
#   Implicit taxes and subsidies in the G20                                    #
#------------------------------------------------------------------------------#

# List (excluding EU members not individually represented)
g20 <- c("ARG","AUS","BRA","CAN","CHN","FRA","DEU","IND","IDN","ITA","JPN",
         "KOR","MEX","RUS","SAU","ZAF","TUR","GBR","USA")

# Including 70 months prior to making commitment to reducing FF subsidies
g20.70 <- data %>% 
  filter(wb_code%in%g20)
g20.70.mean <- summarize(
  group_by(g20.70, date), 
  meanprice = mean(price_usd_2015, na.rm=T), 
  meantax = mean(bmgap2015adj, na.rm=T)
)

# Implicit taxes/subsidies plot for G20
pdf(file="FIG5-bmgap-G20.pdf",width=6.5,height=4.5)
ggplot(g20.70, aes(x=date, y=bmgap2015adj)) + 
  geom_line(aes(group=country),col="darkgray", size = 0.25) + 
  geom_line(data = g20.70.mean, aes(x=date,y=meantax), size=.75, col="black") + 
  geom_hline(yintercept=0, size=0.5, col="red") + 
  geom_vline(xintercept=as.numeric(g20.70$date[70]), 
             col="black", size=0.5, linetype="dashed") + 
  scale_x_date(breaks = date_breaks("1 year"),
               minor_breaks = date_breaks("6 months"),
               labels = date_format("%Y"),
               limits=c(as.Date("2003-12-01"),as.Date("2015-06-01"))) + 
  scale_y_continuous(breaks=seq(-1,2.25,.25)) + 
  labs(x="\n Year",
       y="Distance between benchmark and local price \n (constant 2015 USD per liter)") + 
  theme(legend.position="none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9)) +
  coord_cartesian(ylim=c(-1,2.3))
dev.off()





#------------------------------------------------------------------------------#
#   Figure 6                                                                   #
#   Price fixity over time, grouped into categories based on frequency of      #
#   changing prices                                                            #
#------------------------------------------------------------------------------#

# Fixity types
vital.fixtype <- summarize(
  group_by(
    data %>% filter(!is.na(fix12m), wb_code%in%bksf$wb_code),
    date), 
  nums = n()
)

vital.fixtype <- merge(vital.fixtype,
                       summarize(
                         group_by(
                           data %>% filter(!is.na(fix12m), wb_code%in%bksf$wb_code),
                           date, fixtype), 
                         numstype = n()
                         ),
                       by = "date")

vital.fixtype$pcttype <- vital.fixtype$numstype/vital.fixtype$nums


# Price fixity over time 
pdf(file="FIG6-fixity.pdf",width=6.5,height=4.5)
ggplot(vital.fixtype) + 
  geom_area(aes(x=date, y=pcttype, fill=fixtype)) + 
  labs(x="\n Year", y="Price type share \n (by month)") + 
  scale_x_date(breaks = seq(as.Date("2003-01-01"), as.Date("2015-06-01"),
                            "1 year"), 
               labels = date_format("%Y")) + 
  scale_y_continuous(breaks=seq(0,1,.25),
                     labels=percent) + 
  scale_fill_grey() + 
  annotate("text", 
           x = as.Date("2004-01-01"), 
           y = c(0.15,0.42,0.7), 
           label = c("Fixed-price states",
                     "Transition-price states","Variable-price states"), 
           hjust=0, 
           col = c("white","black","black"),
           size = 2.75) + 
  coord_cartesian(xlim=c(as.Date("2003-01-01"),as.Date("2015-06-01"))) +
  theme(legend.position="none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9))
dev.off()





#------------------------------------------------------------------------------#
#   Table 1                                                                    #
#   Changes in net taxes in the 20 largest petroleum-based CO2 emitters        #
#------------------------------------------------------------------------------#

# Restricting to top 20 emitters
emitters <- bmgplot %>% 
  select(wb_code, country.short, bmgap03, bmgap15, change, emissions) %>% 
  arrange(desc(emissions))
emitters <- emitters[1:20,]

# Table of top 20 emitters for LaTeX file
xtable(emitters %>% select(-wb_code),digits = c(0,0,2,2,2,0))





#------------------------------------------------------------------------------#
#   Metrics and statistics referenced in the main text                         #
#------------------------------------------------------------------------------#

table(bmgplot$change>0)
# FALSE  TRUE 
# 46    83 
#
# Change in net taxes between 1st half 2015 and 1st half 2003:
#   Net gasoline taxes rose in 83 states, dropped in 46 


# Change in unweighted and gasoline consumption-weigthed taxes 
# from 1st half 2003 to 1st half 2015
sixmo <- summarize(
  group_by(wgt, bookends), 
  bench = mean(benchmark), 
  tax = mean(meantax), 
  wgttax = mean(wgtmeantax),
  pctbm = mean(meanpctbm), 
  wgtpctbm = mean(wgtmeanpctbm)
)


print(sixmo)
# Source: local data frame [3 x 4]
# 
# bookends     bench       tax    wgttax
# (chr)     (dbl)     (dbl)     (dbl)
# 1    Begin 0.3954951 0.4284975 0.2787459
# 2      End 0.5632661 0.5088344 0.2417648
# 3  Neither 0.7286629 0.5005260 0.2582424
#
# Unweighted tax rose from US$0.428 to $0.509 per liter
# Weighted tax rose from $0.279 to $0.242 per liter


print((sixmo$tax[2]/sixmo$tax[1])^(1/12) - 1)
# [1] 0.01442284
# the unweighted mean tax increases at a CAGR of 1.44 percent


print((sixmo$wgttax[2]/sixmo$wgttax[1])^(1/12) - 1)
# [1] -0.01179119
# the consumption-weighted mean tax declines at a CAGR of 1.18 percent


print(c(sixmo$pctbm[1], sixmo$pctbm[2]))
# [1] 2.099612 1.915888
# As proportion of benchmark, unweighted tax declines from 210% to 192%


print(c(sixmo$wgtpctbm[1], sixmo$wgtpctbm[2]))
# [1] 1.716061 1.435728
# As proportion of benchmark, weighted tax declines from 172% to 144%


table(table(data$wb_code[data$bmgap2015adj<0&data$date!=as.Date("2005-09-01")])>12)
# FALSE  TRUE 
# 13    33 
#
# 33 countries were below the benchmark, and hence net subsidizers for at least 
# one year between 2003 and 2015.
#   (this excludes the benchmark price-spike in September 2005 as a result of 
#   Hurricane Katrina )


summarize(group_by(data %>% filter(!is.na(bmgap2015adj)),wb_code), 
          nums = n(), 
          subsidizer = sum(bmgap2015adj<0), 
          frac = subsidizer/nums) %>% 
  filter(frac==1)
# 9 countries were below the benchmark, and hence net subsidizers for the 
# entire period from 2003 to 2015: 
#   BHR, DZA, EGY, IRN, KWT, LBY, QAT, SAU, VEN





#------------------------------------------------------------------------------#
#   Supplementary Information                                                  #
#------------------------------------------------------------------------------#

#------------------------------------------------------------------------------#
#   Supplementary Figures 1, 2, 3                                              #
#   Global average implicit taxes using WTI-based gasoline benchmark prices    #
#------------------------------------------------------------------------------#

# Restricting to countries with price data in Jan 2003 and Jan 2015
wgtGULF <- summarize(
  group_by(
    data %>% filter(wb_code %in% bksf$wb_code), 
    date), 
  meanprice = mean(price_usd_2015, na.rm=T), 
  weightedmeanprice = sum(conspct*price_usd_2015, na.rm=T), 
  benchmark = median(benchmarkGULF_2015_adj,na.rm=T), 
  check = sum(conspct, na.rm=T), 
  meantax = meanprice - benchmark, 
  wgtmeantax = weightedmeanprice - benchmark, 
  meanpctbm = mean(pctbmgapGULF2015adj, na.rm=T), 
  wgtmeanpctbm = sum(conspct*pctbmgapGULF2015adj, na.rm=T), 
  bookends = unique(bookends), 
  year = unique(year)
)

# First six months of 2003 vs first six months of 2015
sixmo.sens <- summarize(
  group_by(wgtGULF,bookends), 
  bench = mean(benchmark), 
  tax = mean(meantax), 
  wgttax = mean(wgtmeantax), 
  pctbm = mean(meanpctbm), 
  wgtpctbm = mean(wgtmeanpctbm)
)

sixmo.sens
sixmo.sens$tax[2]/sixmo.sens$tax[1] - 1
sixmo.sens$wgttax[2]/sixmo.sens$wgttax[1] - 1
# Main text: Mean increased 18.7%, weighted mean dropped 13.3% 
# Now with alternative benchmark:
# Mean tax increased 22.5\%, weight mean dropped 7.4\%

sixmo.sens$bench[2]/sixmo.sens$bench[1] - 1
# Benchmark increased 38.3%

print(c(sixmo.sens$pctbm[1], sixmo.sens$pctbm[2]))
# Unweighted decline from 210\% in 1st half of 2003 to 198\% in 1st half of 2015. 

print(c(sixmo.sens$wgtpctbm[1], sixmo.sens$wgtpctbm[2]))
# Weighted decline from 172\% in 1st half of 2003 to 149\% in 1st half of 2015. 

# CAGR
(sixmo.sens$wgttax[2]/sixmo.sens$wgttax[1])^(1/12) - 1
# Weighted tax CAGR decline of 0.6\%
(sixmo.sens$wgtpctbm[2]/sixmo.sens$wgtpctbm[1])^(1/12) - 1
# Weighted pctbm CAGR decline of 1.2\%


# Supplementary Figure 1
pdf(file = "SUPPFIG1.pdf",width=6.5,height=4.5)
ggplot(data.frame(wgtGULF)) + 
  geom_line(aes(x=date,y=meantax),col="skyblue",size=0.75) + 
  labs(x="\n Year",
       y="Distance between benchmark and local price \n (constant 2015 USD per liter)") +
  scale_x_date(breaks = date_breaks("1 year"),
               minor_breaks = date_breaks("6 months"),
               labels = date_format("%Y"),
               limits = c(as.Date("2003-01-01"), as.Date("2015-10-01"))) +
  scale_y_continuous(breaks=seq(-0.1,0.8,0.1),limits=c(-0.1,0.75)) + 
  geom_line(aes(x=date,y=wgtmeantax),col="forestgreen",size=0.75) + 
  geom_hline(yintercept = 0, linetype="dashed",size=0.5,alpha=0.5) + 
  annotate("text",x=as.Date("2012-12-01"),y=c(0.65,0.16),
           label=c("Global mean","Global weighted mean"),size=2.75) +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9))
dev.off()


# Supplementary Figure 2
pdf(file = "SUPPFIG2.pdf",width=6.5,height=4.5)
ggplot(data.frame(wgtGULF)) + 
  geom_line(aes(x=date,y=meanpctbm),col="skyblue",size=0.75) + 
  labs(x="\n Year",y="Local price as percentage of benchmark price \n") + 
  scale_x_date(breaks = date_breaks("1 year"),
               minor_breaks = date_breaks("6 months"),
               labels = date_format("%Y"),
               limits = c(as.Date("2003-01-01"), as.Date("2015-10-01"))) + 
  scale_y_continuous(breaks=seq(0,6,0.5),labels=percent) +
  geom_line(aes(x=date,y=wgtmeanpctbm),col="forestgreen",size=0.75) + 
  geom_hline(yintercept=1,size=0.5,alpha=0.5,linetype="dashed") + 
  annotate("text",x=as.Date("2012-12-01"),y=c(1.9,1.1),
           label=c("Global mean","Global weighted mean"),size=2.75) +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9))
dev.off()


# Supplementary Figure 3
pdf(file = "SUPPFIG3.pdf",width=6.5,height=4.5)
ggplot(data.frame(wgt)) + 
  geom_line(aes(x=date,y=meanpctbm),col="skyblue",size=0.75) +
  labs(x="\n Year",y="\n Local price as percentage of benchmark price \n") + 
  scale_x_date(breaks = date_breaks("1 year"),minor_breaks=date_breaks("6 months"),
               labels = date_format("%Y"),
               limits=c(as.Date("2003-01-01"),as.Date("2015-10-01"))) + 
  scale_y_continuous(breaks=seq(0,6,0.5),labels=percent) + 
  geom_line(aes(x=date,y=wgtmeanpctbm),col="forestgreen",size=0.75) + 
  geom_hline(yintercept=1,size=0.5,alpha=0.5,linetype="dashed") + 
  annotate("text",x=as.Date("2012-12-01"),y=c(1.75,1.1),
           label=c("Global mean","Global weighted mean"),size=2.75) +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9))
dev.off()





#------------------------------------------------------------------------------#
#   Supplementary Figure 4                                                     #
#   Top gasoline consumers                                                     #
#------------------------------------------------------------------------------#

# Consumption changes
conschange.0 <- data.frame(wb_code = data$wb_code[data$year==2003], 
                           country.short = data$country_short[data$year==2003],
                           bmgap03 = data$bmgap2015adj[data$year==2003], 
                           bmgap12 = data$bmgap2015adj[data$year==2012], 
                           cons03 = data$gasolinecons[data$year==2003], 
                           cons12 = data$gasolinecons[data$year==2012], 
                           w03 = data$conspct[data$year==2003], 
                           w12 = data$conspct[data$year==2012])

conschange <- summarize(group_by(conschange.0,wb_code), 
                        bmgap2003 = mean(bmgap03,na.rm=T), 
                        bmgap2012 = mean(bmgap12,na.rm=T), 
                        cons2003 = mean(cons03,na.rm=T), 
                        cons2012 = mean(cons12,na.rm=T), 
                        w2003 = mean(w03,na.rm=T), 
                        w2012 = mean(w12,na.rm=T),
                        increase = (bmgap2012>bmgap2003), 
                        bmchange = bmgap2012 - bmgap2003, 
                        conschange = cons2012-cons2003,
                        country.short = unique(country.short,na.rm=T))

# Getting mean price for first half 2015
meanprice2015 <- summarize(group_by(data %>% filter(bookends=="End"),wb_code), 
                           price = mean(price_usd_2015, na.rm=T), 
                           benchmean = mean(benchmark_2015_adj, na.rm=T))
conschange <- merge(conschange, meanprice2015, by = "wb_code", all.x = T, all.y = F)

# Top 20
conschangetop20 <- conschange %>% arrange(desc(cons2012))
conschangetop20 <- conschangetop20[1:20,]

# Change in consumption vs mean price
pdf(file="SUPPFIG4.pdf",width=6.5,height=6)
ggplot(conschangetop20, aes(x=conschange, y=price, size=cons2012)) + 
  geom_hline(yintercept=conschangetop20$benchmean,
             linetype="longdash",col="gray") + 
  geom_vline(xintercept=0,linetype="longdash",col="gray") + 
  geom_point(col="#0072B2") + 
  geom_text_repel(aes(label=country.short),size=2.75,
                  point.padding = unit(0.5, "lines")) + 
  scale_size(range=c(1,6),
             name="Gasoline consumption in 2012",
             breaks=c(500,2000,7000)) + 
  scale_y_continuous(breaks=seq(0,2,0.5)) + 
  labs(y="Retail gasoline price in first half 2015 \n ($/L)",
       x="\n Change in gasoline consumption from 2003 to 2012 \n (thousand barrels per day annual average)") + 
  theme(legend.position="bottom",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9))
dev.off()





#------------------------------------------------------------------------------#
#   Supplementary Table 2                                                      #
#   Country list                                                               #
#------------------------------------------------------------------------------#

# Full country list for Appendix using short country names
fulllist <- data %>% 
  select(wb_code,country_short,date,price)

fulllist$country_short[fulllist$wb_code=="MMR"] <- "Myanmar"

listtable <- summarize(
  group_by(fulllist %>% filter(!is.na(price)),country_short), 
  minyear = min(date), 
  maxyear = max(date), 
  nums = n())

listtable$minyear <- format(listtable$minyear, format="%b %Y")
listtable$maxyear <- format(listtable$maxyear, format="%b %Y")

print(xtable(listtable), include.rownames=FALSE)

